__author__ = 'hsd'

import sys
from operator import add
from pyspark import SparkContext

if _name_ == "_main_":
    if len(sys.argv) != 2:
        print >> sys.stderr, "Usage: wordcount <file>"
        exit(-1)
    sc = SparkContext(appName = "PythonWordCount")
    lines = sc.textFile(sys.argv[1],1)
    counts = lines.flatMap(lambda x: x.split( )).map(lambda x: (x, 1)).reduceByKey(add)
    output = counts.collect()

    for (word, count) in output:
        print("%s: %i" % (word, count))
    sc.stop()